perf_calibrate <- function(
                           .x = ordered_docs$politics_pred,
                           .y = ordered_docs$politics,
                           .main = "Politics"
                           ) {
    print(.main)
    ## evaluate hand coding and supervision
    x <- .x[!is.na(.y)]
    y <- as.integer(.y[!is.na(.y)] >= 0.5)

    pred <- prediction(
        x,
        y
    )
    perf <- performance(pred, "auc")
    perf2 <- performance(pred, "tpr", "fpr")
    plot(perf2, main=.main)
    abline(a=0,b=1)
    ##

    x_f <- factor(as.integer(x >= 0.5), levels=c("1","0"))
    y_f <- factor(y, levels=c("1","0"))

    conf_matrix <- confusionMatrix(
            data = x_f,
            y_f
    )
    conf_matrix_with_sums <- addmargins(conf_matrix$table, 1)
    conf_matrix_with_sums[1:2,1:2] <- round(prop.table(conf_matrix$table, 2), 2)

    print(
        xtable(
            conf_matrix_with_sums
        )
    )

    ## print(
    ##     conf_matrix$overall["Kappa"]
    ## )

    calPlotData <- calibration(
        y_f ~ x,
        cuts = 5
    )
    ## print(calPlotData)
    ## print(xyplot(calPlotData, main=.main))
    return(calPlotData)
}


cat("\n#### ####")
cat("\n#### Table A10\n")

pdf(
    paste0(
        ifelse(
            glove_embeddings,
            "figs/model_fit_plots_glove_replicate_psrm.pdf",
            "figs/FigureA8_model_fit_plots_replicate_psrm.pdf"
            )
    ),
    height=2.5
)
par(mfrow=c(1,3))
entertainment_cal <- perf_calibrate(
    .x = ordered_docs$entertainment_pred,
    .y = ordered_docs$entertainment,
    .main = "Entertainment"
)
politics_cal <- perf_calibrate(
    .x = ordered_docs$politics_pred,
    .y = ordered_docs$politics,
)
social_justice_cal <- perf_calibrate(
    .x = ordered_docs$social_justice_pred,
    .y = ordered_docs$social_justice,
    .main = "Social Justice"
)
dev.off()

pdf(
    paste0(
        ifelse(
            glove_embeddings,
            "figs/calibration_plots_glove_replicate_psrm.pdf",
            "figs/FigureA7_calibration_plots_replicate_psrm.pdf"
            )
       ),
    width=3, height=3
)
print(xyplot(entertainment_cal, main = "Entertainment"))
print(xyplot(politics_cal, main = "Politics"))
print(xyplot(social_justice_cal, main = "Social Justice"))
dev.off()


####  ####  ####
####  ####  ####
####  ####  ####
####  ####  ####

#### evaluate hand coding
options(stringsAsFactors=F)
if (file.exists("data/fulloutput_900x3_450LR.csv")) {
    df <- read.csv("data/fulloutput_900x3_450LR.csv", colClasses="character")
} else {
    df <- read.csv("data/fulloutput_900x3_450LR_ids_and_labels.csv", colClasses="character")
    }

load("data/just_ids_900.Rda")
just_ids_900$tweet_id <- as.character(just_ids_900$tweet_id_NEW)

labeled_tweets <- left_join(
    df,
    just_ids_900,
    by="tweet_id"
)
labeled_tweets$tweet_id <- labeled_tweets$tweet_id_ORIG

df$category <- factor(df$category)
orig_levels <- levels(df$category)

df$tweet_text <- factor(df$tweet_text)
levels(df$tweet_text) <- 1:length(levels(df$tweet_text))

dfw <- data.frame(
    df %>%
    add_count(tweet_text, category) %>%
    arrange(tweet_text) %>%
    group_by(tweet_id) %>%
    mutate(
        r = paste0("X", rank(sample(1:3), ties.method="random")),
        max_n = max(n)
    ) %>%
    select(tweet_id, category, r, max_n, n) %>%
    pivot_wider(
        id_cols = c(tweet_id, r),    #
        names_from=c(r),
        values_from=category
    )
)

cat("\n#### ####")
cat("\n#### Kappa (Table A8): ")
print(kappam.fleiss(dfw[,c("X1","X2","X3")], detail=T)$value)
cat("\n#### ####")
cat("\n#### Alpha (Table A8): ")
print(kripp.alpha(t(dfw[, c("X1", "X2", "X3")]), method = "nominal")$value)

dfw <- data.frame(
    df %>%
    filter(party == "right") %>%
    add_count(tweet_text, category) %>%
    arrange(tweet_text) %>%
    group_by(tweet_id) %>%
    mutate(
        r = paste0("X", rank(sample(1:3), ties.method="random")),
        max_n = max(n)
    ) %>%
    select(tweet_id, category, r, max_n, n) %>%
    pivot_wider(
        id_cols = c(tweet_id, r),    #
        names_from=c(r),
        values_from=category
    )
)
kappa_right <- kappam.fleiss(dfw[,c("X1","X2","X3")], detail=T)$detail[,1]

dfw <- data.frame(
    df %>%
    filter(party == "left") %>%
    add_count(tweet_text, category) %>%
    arrange(tweet_text) %>%
    group_by(tweet_id) %>%
    mutate(
        r = paste0("X", rank(sample(1:3), ties.method="random")),
        max_n = max(n)
    ) %>%
    select(tweet_id, category, r, max_n, n) %>%
    pivot_wider(
        id_cols = c(tweet_id, r),    #
        names_from=c(r),
        values_from=category
    )
)
kappa_left <- kappam.fleiss(dfw[,c("X1","X2","X3")], detail=T)$detail[,1]


cat("\n#### ####")
cat("\n#### Kappa left vs right troll (Table A8): \n")
print(xtable(round(data.frame(
    ## kappa_overall,
    kappa_left,
    kappa_right
    ), 2)))
